TMM normalization

goat<-read.table("../data/goat.csv")
goatt<-t(goat)
library("corrplot")
## corrplot 0.90 loaded
library("FactoMineR")
library("factoextra")
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library("reshape2")
library("ggplot2")
library("Hmisc")
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units

Data Analysis

summary(goat)
##    cd4_goat1           cd4_goat2           cd4_goat3        
##  Min.   :    0.000   Min.   :    0.000   Min.   :    0.000  
##  1st Qu.:    0.000   1st Qu.:    0.000   1st Qu.:    0.000  
##  Median :    0.672   Median :    0.797   Median :    0.857  
##  Mean   :   33.777   Mean   :   35.090   Mean   :   32.679  
##  3rd Qu.:   20.914   3rd Qu.:   20.940   3rd Qu.:   19.929  
##  Max.   :12587.386   Max.   :14665.324   Max.   :12121.344  
##    cd4_goat4           cd8_goat1           cd8_goat2          cd8_goat3        
##  Min.   :    0.000   Min.   :    0.000   Min.   :    0.00   Min.   :    0.000  
##  1st Qu.:    0.000   1st Qu.:    0.000   1st Qu.:    0.00   1st Qu.:    0.000  
##  Median :    0.777   Median :    0.787   Median :    0.85   Median :    0.898  
##  Mean   :   32.408   Mean   :   34.065   Mean   :   34.76   Mean   :   34.289  
##  3rd Qu.:   20.428   3rd Qu.:   21.132   3rd Qu.:   21.23   3rd Qu.:   20.485  
##  Max.   :11393.197   Max.   :12622.673   Max.   :13835.64   Max.   :12630.834  
##    cd8_goat4         cerebellum_goat1    cerebellum_goat2   
##  Min.   :    0.000   Min.   :    0.000   Min.   :    0.000  
##  1st Qu.:    0.000   1st Qu.:    0.030   1st Qu.:    0.033  
##  Median :    0.839   Median :    2.713   Median :    2.580  
##  Mean   :   33.944   Mean   :   31.835   Mean   :   30.986  
##  3rd Qu.:   20.865   3rd Qu.:   22.960   3rd Qu.:   22.700  
##  Max.   :10231.537   Max.   :20765.002   Max.   :18409.420  
##  cerebellum_goat3    cerebellum_goat4     ileum_goat4       
##  Min.   :    0.000   Min.   :    0.000   Min.   :    0.000  
##  1st Qu.:    0.036   1st Qu.:    0.037   1st Qu.:    0.029  
##  Median :    2.759   Median :    2.897   Median :    2.765  
##  Mean   :   31.843   Mean   :   31.575   Mean   :   35.768  
##  3rd Qu.:   23.463   3rd Qu.:   23.958   3rd Qu.:   24.055  
##  Max.   :19318.600   Max.   :18025.833   Max.   :13249.520  
##   liver_goat1         liver_goat2         liver_goat3        liver_goat4      
##  Min.   :     0.00   Min.   :     0.00   Min.   :    0.00   Min.   :    0.00  
##  1st Qu.:     0.02   1st Qu.:     0.02   1st Qu.:    0.01   1st Qu.:    0.02  
##  Median :     2.62   Median :     2.76   Median :    2.21   Median :    2.83  
##  Mean   :    72.29   Mean   :    74.50   Mean   :   64.65   Mean   :   65.60  
##  3rd Qu.:    25.68   3rd Qu.:    25.19   3rd Qu.:   25.36   3rd Qu.:   25.87  
##  Max.   :129798.50   Max.   :165934.97   Max.   :97641.67   Max.   :67000.18  
##    lung_goat1         lung_goat2         lung_goat3         lung_goat4      
##  Min.   :   0.000   Min.   :   0.000   Min.   :   0.000   Min.   :   0.000  
##  1st Qu.:   0.040   1st Qu.:   0.036   1st Qu.:   0.040   1st Qu.:   0.033  
##  Median :   3.763   Median :   3.487   Median :   3.756   Median :   3.591  
##  Mean   :  33.650   Mean   :  34.410   Mean   :  33.577   Mean   :  32.588  
##  3rd Qu.:  25.210   3rd Qu.:  25.019   3rd Qu.:  24.903   3rd Qu.:  24.499  
##  Max.   :9130.728   Max.   :8163.974   Max.   :9260.541   Max.   :8023.443  
##   muscle_goat1        muscle_goat2       muscle_goat3        muscle_goat4     
##  Min.   :     0.00   Min.   :    0.00   Min.   :     0.00   Min.   :    0.00  
##  1st Qu.:     0.00   1st Qu.:    0.00   1st Qu.:     0.00   1st Qu.:    0.00  
##  Median :     1.63   Median :    1.32   Median :     1.09   Median :    1.41  
##  Mean   :    92.10   Mean   :   82.52   Mean   :    88.09   Mean   :   75.06  
##  3rd Qu.:    21.96   3rd Qu.:   21.36   3rd Qu.:    21.09   3rd Qu.:   21.26  
##  Max.   :107851.12   Max.   :95552.58   Max.   :168102.68   Max.   :69647.12  
##   testis_goat1       testis_goat2     
##  Min.   :   0.000   Min.   :   0.000  
##  1st Qu.:   0.213   1st Qu.:   0.217  
##  Median :   3.963   Median :   3.917  
##  Mean   :  36.527   Mean   :  33.845  
##  3rd Qu.:  27.942   3rd Qu.:  26.466  
##  Max.   :3268.795   Max.   :3054.008
boxplot(goat)

boxplot(goat,outline=FALSE) #outliers are not drawn

#percentage of 0 
percent_zero<-(colSums(goat==0)/nrow(goat))*100
plot(percent_zero,col="purple",main="Percentage of 0 per tissue and subject",type="b")

#median 
list_median<-apply(goat,2,median)
plot(list_median,col="blue",main="Median per tissue and subject",type="b",xlab="Tissues")

goat50=goatt[,1:50]
lapply(1:ncol(goat50), function(i) hist(goat50[,i],xlab="Expression",main=paste("Histogram of" ,colnames(goat50)[i])))

## [[1]]
## $breaks
## [1] 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0
## 
## $counts
## [1] 22  1  0  0  1  1  1  1
## 
## $density
## [1] 1.62962963 0.07407407 0.00000000 0.00000000 0.07407407 0.07407407 0.07407407
## [8] 0.07407407
## 
## $mids
## [1] 0.25 0.75 1.25 1.75 2.25 2.75 3.25 3.75
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[2]]
## $breaks
## [1]    0  500 1000 1500 2000
## 
## $counts
## [1] 21  3  1  2
## 
## $density
## [1] 1.555556e-03 2.222222e-04 7.407407e-05 1.481481e-04
## 
## $mids
## [1]  250  750 1250 1750
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[3]]
## $breaks
## [1]  0  2  4  6  8 10 12 14
## 
## $counts
## [1] 23  0  0  0  0  2  2
## 
## $density
## [1] 0.42592593 0.00000000 0.00000000 0.00000000 0.00000000 0.03703704 0.03703704
## 
## $mids
## [1]  1  3  5  7  9 11 13
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[4]]
## $breaks
## [1]    0  500 1000 1500 2000 2500 3000
## 
## $counts
## [1] 14  5  0  5  2  1
## 
## $density
## [1] 1.037037e-03 3.703704e-04 0.000000e+00 3.703704e-04 1.481481e-04
## [6] 7.407407e-05
## 
## $mids
## [1]  250  750 1250 1750 2250 2750
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[5]]
## $breaks
## [1]   0  20  40  60  80 100 120 140
## 
## $counts
## [1] 19  0  3  1  0  2  2
## 
## $density
## [1] 0.035185185 0.000000000 0.005555556 0.001851852 0.000000000 0.003703704
## [7] 0.003703704
## 
## $mids
## [1]  10  30  50  70  90 110 130
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[6]]
## $breaks
## [1]    0  500 1000 1500 2000 2500
## 
## $counts
## [1] 11  8  4  2  2
## 
## $density
## [1] 0.0008148148 0.0005925926 0.0002962963 0.0001481481 0.0001481481
## 
## $mids
## [1]  250  750 1250 1750 2250
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[7]]
## $breaks
## [1]  0  5 10 15 20
## 
## $counts
## [1] 13  5  7  2
## 
## $density
## [1] 0.09629630 0.03703704 0.05185185 0.01481481
## 
## $mids
## [1]  2.5  7.5 12.5 17.5
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[8]]
## $breaks
## [1]  0  5 10 15 20 25
## 
## $counts
## [1] 12  3  7  4  1
## 
## $density
## [1] 0.088888889 0.022222222 0.051851852 0.029629630 0.007407407
## 
## $mids
## [1]  2.5  7.5 12.5 17.5 22.5
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[9]]
## $breaks
## [1]  0 10 20 30 40 50 60 70
## 
## $counts
## [1] 13  8  2  0  2  1  1
## 
## $density
## [1] 0.048148148 0.029629630 0.007407407 0.000000000 0.007407407 0.003703704
## [7] 0.003703704
## 
## $mids
## [1]  5 15 25 35 45 55 65
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[10]]
## $breaks
## [1]    0 1000 2000 3000 4000 5000 6000 7000
## 
## $counts
## [1] 2 3 7 5 2 6 2
## 
## $density
## [1] 7.407407e-05 1.111111e-04 2.592593e-04 1.851852e-04 7.407407e-05
## [6] 2.222222e-04 7.407407e-05
## 
## $mids
## [1]  500 1500 2500 3500 4500 5500 6500
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[11]]
## $breaks
## [1] 0 1 2 3 4 5 6
## 
## $counts
## [1] 17  5  3  0  1  1
## 
## $density
## [1] 0.62962963 0.18518519 0.11111111 0.00000000 0.03703704 0.03703704
## 
## $mids
## [1] 0.5 1.5 2.5 3.5 4.5 5.5
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[12]]
## $breaks
## [1] 0 1 2 3 4 5
## 
## $counts
## [1] 17  5  3  1  1
## 
## $density
## [1] 0.62962963 0.18518519 0.11111111 0.03703704 0.03703704
## 
## $mids
## [1] 0.5 1.5 2.5 3.5 4.5
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[13]]
## $breaks
## [1] 0.0 0.5 1.0 1.5 2.0
## 
## $counts
## [1] 17  7  1  2
## 
## $density
## [1] 1.25925926 0.51851852 0.07407407 0.14814815
## 
## $mids
## [1] 0.25 0.75 1.25 1.75
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[14]]
## $breaks
## [1]  0  5 10 15 20 25
## 
## $counts
## [1] 13  9  2  1  2
## 
## $density
## [1] 0.096296296 0.066666667 0.014814815 0.007407407 0.014814815
## 
## $mids
## [1]  2.5  7.5 12.5 17.5 22.5
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[15]]
## $breaks
## [1]  0 10 20 30 40 50 60 70
## 
## $counts
## [1] 11  7  6  1  0  1  1
## 
## $density
## [1] 0.040740741 0.025925926 0.022222222 0.003703704 0.000000000 0.003703704
## [7] 0.003703704
## 
## $mids
## [1]  5 15 25 35 45 55 65
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[16]]
## $breaks
## [1]     0 10000 20000 30000 40000 50000
## 
## $counts
## [1] 12  8  3  2  2
## 
## $density
## [1] 4.444444e-05 2.962963e-05 1.111111e-05 7.407407e-06 7.407407e-06
## 
## $mids
## [1]  5000 15000 25000 35000 45000
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[17]]
## $breaks
## [1]   0 100 200 300 400 500 600 700
## 
## $counts
## [1]  6 10  5  1  3  0  2
## 
## $density
## [1] 0.0022222222 0.0037037037 0.0018518519 0.0003703704 0.0011111111
## [6] 0.0000000000 0.0007407407
## 
## $mids
## [1]  50 150 250 350 450 550 650
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[18]]
## $breaks
## [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8
## 
## $counts
## [1]  4 11  6  2  2  0  1  1
## 
## $density
## [1] 1.4814815 4.0740741 2.2222222 0.7407407 0.7407407 0.0000000 0.3703704
## [8] 0.3703704
## 
## $mids
## [1] 0.05 0.15 0.25 0.35 0.45 0.55 0.65 0.75
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[19]]
## $breaks
## [1]    0  500 1000 1500 2000 2500
## 
## $counts
## [1] 14  9  0  3  1
## 
## $density
## [1] 1.037037e-03 6.666667e-04 0.000000e+00 2.222222e-04 7.407407e-05
## 
## $mids
## [1]  250  750 1250 1750 2250
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[20]]
## $breaks
## [1] 0.00 0.02 0.04 0.06 0.08 0.10 0.12
## 
## $counts
## [1] 20  1  0  2  2  2
## 
## $density
## [1] 37.037037  1.851852  0.000000  3.703704  3.703704  3.703704
## 
## $mids
## [1] 0.01 0.03 0.05 0.07 0.09 0.11
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[21]]
## $breaks
## [1]  0 10 20 30 40 50 60 70
## 
## $counts
## [1]  2 12  4  4  2  1  2
## 
## $density
## [1] 0.007407407 0.044444444 0.014814815 0.014814815 0.007407407 0.003703704
## [7] 0.007407407
## 
## $mids
## [1]  5 15 25 35 45 55 65
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[22]]
## $breaks
## [1]    0  500 1000 1500 2000 2500 3000 3500
## 
## $counts
## [1]  7 11  4  1  2  1  1
## 
## $density
## [1] 5.185185e-04 8.148148e-04 2.962963e-04 7.407407e-05 1.481481e-04
## [6] 7.407407e-05 7.407407e-05
## 
## $mids
## [1]  250  750 1250 1750 2250 2750 3250
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[23]]
## $breaks
## [1]    0 1000 2000 3000 4000 5000 6000 7000 8000
## 
## $counts
## [1]  3 12  3  3  2  2  1  1
## 
## $density
## [1] 1.111111e-04 4.444444e-04 1.111111e-04 1.111111e-04 7.407407e-05
## [6] 7.407407e-05 3.703704e-05 3.703704e-05
## 
## $mids
## [1]  500 1500 2500 3500 4500 5500 6500 7500
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[24]]
## $breaks
## [1]  0  5 10 15 20 25
## 
## $counts
## [1] 20  3  1  2  1
## 
## $density
## [1] 0.148148148 0.022222222 0.007407407 0.014814815 0.007407407
## 
## $mids
## [1]  2.5  7.5 12.5 17.5 22.5
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[25]]
## $breaks
##  [1]    0  200  400  600  800 1000 1200 1400 1600 1800
## 
## $counts
## [1]  5 11  3  1  3  0  1  0  3
## 
## $density
## [1] 0.0009259259 0.0020370370 0.0005555556 0.0001851852 0.0005555556
## [6] 0.0000000000 0.0001851852 0.0000000000 0.0005555556
## 
## $mids
## [1]  100  300  500  700  900 1100 1300 1500 1700
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[26]]
## $breaks
## [1] 0 1 2 3 4 5 6 7
## 
## $counts
## [1]  9 12  2  0  2  0  2
## 
## $density
## [1] 0.33333333 0.44444444 0.07407407 0.00000000 0.07407407 0.00000000 0.07407407
## 
## $mids
## [1] 0.5 1.5 2.5 3.5 4.5 5.5 6.5
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[27]]
## $breaks
## [1]    0  500 1000 1500 2000
## 
## $counts
## [1]  8 13  3  3
## 
## $density
## [1] 0.0005925926 0.0009629630 0.0002222222 0.0002222222
## 
## $mids
## [1]  250  750 1250 1750
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[28]]
## $breaks
## [1]     0  2000  4000  6000  8000 10000
## 
## $counts
## [1]  5 13  5  1  3
## 
## $density
## [1] 9.259259e-05 2.407407e-04 9.259259e-05 1.851852e-05 5.555556e-05
## 
## $mids
## [1] 1000 3000 5000 7000 9000
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[29]]
## $breaks
## [1] 0.00 0.05 0.10 0.15 0.20 0.25 0.30 0.35
## 
## $counts
## [1] 6 3 5 3 4 4 2
## 
## $density
## [1] 4.444444 2.222222 3.703704 2.222222 2.962963 2.962963 1.481481
## 
## $mids
## [1] 0.025 0.075 0.125 0.175 0.225 0.275 0.325
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[30]]
## $breaks
## [1] 0.00 0.02 0.04 0.06 0.08 0.10 0.12
## 
## $counts
## [1] 19  5  2  0  0  1
## 
## $density
## [1] 35.185185  9.259259  3.703704  0.000000  0.000000  1.851852
## 
## $mids
## [1] 0.01 0.03 0.05 0.07 0.09 0.11
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[31]]
## $breaks
## [1] 0.0 0.2 0.4 0.6 0.8 1.0
## 
## $counts
## [1] 23  0  1  2  1
## 
## $density
## [1] 4.2592593 0.0000000 0.1851852 0.3703704 0.1851852
## 
## $mids
## [1] 0.1 0.3 0.5 0.7 0.9
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[32]]
## $breaks
## [1]    0 1000 2000 3000 4000 5000 6000
## 
## $counts
## [1] 11  8  4  0  3  1
## 
## $density
## [1] 4.074074e-04 2.962963e-04 1.481481e-04 0.000000e+00 1.111111e-04
## [6] 3.703704e-05
## 
## $mids
## [1]  500 1500 2500 3500 4500 5500
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[33]]
## $breaks
## [1]   0 100 200 300 400 500 600
## 
## $counts
## [1] 16  3  4  1  2  1
## 
## $density
## [1] 0.0059259259 0.0011111111 0.0014814815 0.0003703704 0.0007407407
## [6] 0.0003703704
## 
## $mids
## [1]  50 150 250 350 450 550
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[34]]
## $breaks
## [1]   0  20  40  60  80 100
## 
## $counts
## [1] 18  1  6  1  1
## 
## $density
## [1] 0.033333333 0.001851852 0.011111111 0.001851852 0.001851852
## 
## $mids
## [1] 10 30 50 70 90
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[35]]
## $breaks
##  [1]    0 1000 2000 3000 4000 5000 6000 7000 8000 9000
## 
## $counts
## [1]  2 13  2  5  1  0  0  3  1
## 
## $density
## [1] 7.407407e-05 4.814815e-04 7.407407e-05 1.851852e-04 3.703704e-05
## [6] 0.000000e+00 0.000000e+00 1.111111e-04 3.703704e-05
## 
## $mids
## [1]  500 1500 2500 3500 4500 5500 6500 7500 8500
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[36]]
## $breaks
## [1]  0  2  4  6  8 10 12
## 
## $counts
## [1] 23  0  0  1  1  2
## 
## $density
## [1] 0.42592593 0.00000000 0.00000000 0.01851852 0.01851852 0.03703704
## 
## $mids
## [1]  1  3  5  7  9 11
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[37]]
## $breaks
## [1]  0  5 10 15 20
## 
## $counts
## [1] 23  0  2  2
## 
## $density
## [1] 0.17037037 0.00000000 0.01481481 0.01481481
## 
## $mids
## [1]  2.5  7.5 12.5 17.5
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[38]]
## $breaks
## [1] 0.0 0.2 0.4 0.6 0.8 1.0
## 
## $counts
## [1] 21  2  0  1  3
## 
## $density
## [1] 3.8888889 0.3703704 0.0000000 0.1851852 0.5555556
## 
## $mids
## [1] 0.1 0.3 0.5 0.7 0.9
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[39]]
## $breaks
## [1]  0 10 20 30 40 50 60
## 
## $counts
## [1] 26  0  0  0  0  1
## 
## $density
## [1] 0.096296296 0.000000000 0.000000000 0.000000000 0.000000000 0.003703704
## 
## $mids
## [1]  5 15 25 35 45 55
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[40]]
## $breaks
## [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8
## 
## $counts
## [1] 25  0  0  0  0  0  1  1
## 
## $density
## [1] 9.2592593 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.3703704
## [8] 0.3703704
## 
## $mids
## [1] 0.05 0.15 0.25 0.35 0.45 0.55 0.65 0.75
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[41]]
## $breaks
## [1]   0 100 200 300 400 500
## 
## $counts
## [1] 25  0  1  0  1
## 
## $density
## [1] 0.0092592593 0.0000000000 0.0003703704 0.0000000000 0.0003703704
## 
## $mids
## [1]  50 150 250 350 450
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[42]]
## $breaks
## [1]  0  2  4  6  8 10 12 14 16
## 
## $counts
## [1] 19  1  1  0  1  3  1  1
## 
## $density
## [1] 0.35185185 0.01851852 0.01851852 0.00000000 0.01851852 0.05555556 0.01851852
## [8] 0.01851852
## 
## $mids
## [1]  1  3  5  7  9 11 13 15
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[43]]
## $breaks
## [1] 0.00 0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40
## 
## $counts
## [1] 22  2  1  0  0  1  0  1
## 
## $density
## [1] 16.2962963  1.4814815  0.7407407  0.0000000  0.0000000  0.7407407  0.0000000
## [8]  0.7407407
## 
## $mids
## [1] 0.025 0.075 0.125 0.175 0.225 0.275 0.325 0.375
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[44]]
## $breaks
## [1]  0  5 10 15 20 25 30
## 
## $counts
## [1] 25  0  0  0  1  1
## 
## $density
## [1] 0.185185185 0.000000000 0.000000000 0.000000000 0.007407407 0.007407407
## 
## $mids
## [1]  2.5  7.5 12.5 17.5 22.5 27.5
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[45]]
## $breaks
## [1] 0.00 0.02 0.04 0.06 0.08 0.10 0.12 0.14
## 
## $counts
## [1] 19  2  1  2  1  1  1
## 
## $density
## [1] 35.185185  3.703704  1.851852  3.703704  1.851852  1.851852  1.851852
## 
## $mids
## [1] 0.01 0.03 0.05 0.07 0.09 0.11 0.13
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[46]]
## $breaks
## [1]  0  2  4  6  8 10
## 
## $counts
## [1] 25  0  0  0  2
## 
## $density
## [1] 0.46296296 0.00000000 0.00000000 0.00000000 0.03703704
## 
## $mids
## [1] 1 3 5 7 9
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[47]]
## $breaks
## [1]  0  2  4  6  8 10 12 14
## 
## $counts
## [1] 8 0 4 2 5 4 4
## 
## $density
## [1] 0.14814815 0.00000000 0.07407407 0.03703704 0.09259259 0.07407407 0.07407407
## 
## $mids
## [1]  1  3  5  7  9 11 13
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[48]]
## $breaks
## [1]   0 100 200 300 400 500 600 700 800
## 
## $counts
## [1]  9 14  0  0  0  0  0  4
## 
## $density
## [1] 0.003333333 0.005185185 0.000000000 0.000000000 0.000000000 0.000000000
## [7] 0.000000000 0.001481481
## 
## $mids
## [1]  50 150 250 350 450 550 650 750
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[49]]
## $breaks
## [1] 0.0 0.2 0.4 0.6 0.8 1.0 1.2 1.4
## 
## $counts
## [1] 25  0  1  0  0  0  1
## 
## $density
## [1] 4.6296296 0.0000000 0.1851852 0.0000000 0.0000000 0.0000000 0.1851852
## 
## $mids
## [1] 0.1 0.3 0.5 0.7 0.9 1.1 1.3
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[50]]
## $breaks
## [1]    0 1000 2000 3000 4000 5000 6000
## 
## $counts
## [1] 23  0  1  0  1  2
## 
## $density
## [1] 8.518519e-04 0.000000e+00 3.703704e-05 0.000000e+00 3.703704e-05
## [6] 7.407407e-05
## 
## $mids
## [1]  500 1500 2500 3500 4500 5500
## 
## $xname
## [1] "goat50[, i]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
tissue_split <- strsplit(colnames(goat),"_")
tissue <- sapply(tissue_split,function(x){return (x[1])})
goat2 <- goat
colnames(goat2) <- tissue
goat2 <- melt(as.matrix(goat2))
colnames(goat2) <- c("Gene","Tissue","Expression")
ggplot(data=goat2,aes(x=Tissue,y=Expression,color=Tissue))+geom_boxplot()+ labs(title="Goat tissues boxplots")+theme(legend.position="none")

eps=1e-6
ggplot(data=goat2,aes(x=Tissue,y=log(Expression+eps),color=Tissue))+geom_boxplot()+ labs(title="Goat tissues boxplots")+theme(legend.position="none")

Principal Component Analysis

goat_acp<-PCA(goatt,graph=FALSE) #the individuals are the tissues and the variables are the genes 
fviz_pca_ind (goat_acp, col.ind = "cos2",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE # Évite le chevauchement de texte
             )
## Warning: ggrepel: 15 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

fviz_eig(goat_acp, addlabels = TRUE) #to visualize how many components are necessary 

corrplot(cor(goat),method="ellipse")

corrplot(cor(goatt[,1:30]),method="ellipse")

corrplot(cor(goatt[,1:50]),method="ellipse")

corrplot(cor(goatt[,1000:1030]),method="ellipse")

Loess Normalization

goat_loess<-read.table("../data/goat_loess.csv")
goat_loesst<-t(goat_loess)

Data Analysis

summary(goat_loess)
##    cd4_goat1         cd4_goat2          cd4_goat3          cd4_goat4       
##  Min.   :   0.00   Min.   :   0.000   Min.   :   0.000   Min.   :   0.000  
##  1st Qu.:   0.00   1st Qu.:   0.000   1st Qu.:   0.000   1st Qu.:   0.000  
##  Median :   1.42   Median :   1.579   Median :   1.697   Median :   1.559  
##  Mean   :  29.01   Mean   :  29.597   Mean   :  28.741   Mean   :  28.642  
##  3rd Qu.:  23.48   3rd Qu.:  23.147   3rd Qu.:  21.917   3rd Qu.:  22.467  
##  Max.   :6702.30   Max.   :6607.418   Max.   :6546.953   Max.   :7038.558  
##    cd8_goat1         cd8_goat2          cd8_goat3          cd8_goat4       
##  Min.   :   0.00   Min.   :   0.000   Min.   :   0.000   Min.   :   0.000  
##  1st Qu.:   0.00   1st Qu.:   0.000   1st Qu.:   0.000   1st Qu.:   0.000  
##  Median :   1.45   Median :   1.516   Median :   1.624   Median :   1.573  
##  Mean   :  28.52   Mean   :  28.861   Mean   :  29.751   Mean   :  29.785  
##  3rd Qu.:  22.75   3rd Qu.:  22.715   3rd Qu.:  22.511   3rd Qu.:  22.920  
##  Max.   :6067.88   Max.   :6179.076   Max.   :6404.083   Max.   :6400.909  
##  cerebellum_goat1   cerebellum_goat2   cerebellum_goat3   cerebellum_goat4  
##  Min.   :    0.00   Min.   :    0.00   Min.   :    0.00   Min.   :    0.00  
##  1st Qu.:    0.03   1st Qu.:    0.03   1st Qu.:    0.03   1st Qu.:    0.03  
##  Median :    1.93   Median :    1.92   Median :    1.92   Median :    1.93  
##  Mean   :   43.37   Mean   :   44.64   Mean   :   43.92   Mean   :   40.39  
##  3rd Qu.:   22.71   3rd Qu.:   22.93   3rd Qu.:   22.84   3rd Qu.:   22.65  
##  Max.   :80599.58   Max.   :89199.93   Max.   :83855.13   Max.   :62475.01  
##   ileum_goat4         liver_goat1        liver_goat2         liver_goat3      
##  Min.   :    0.000   Min.   :    0.00   Min.   :     0.00   Min.   :    0.00  
##  1st Qu.:    0.032   1st Qu.:    0.02   1st Qu.:     0.02   1st Qu.:    0.01  
##  Median :    2.358   Median :    2.14   Median :     2.23   Median :    2.09  
##  Mean   :   36.352   Mean   :   61.20   Mean   :    64.70   Mean   :   58.74  
##  3rd Qu.:   23.983   3rd Qu.:   23.68   3rd Qu.:    23.44   3rd Qu.:   23.99  
##  Max.   :15633.909   Max.   :78155.61   Max.   :100416.48   Max.   :74426.64  
##   liver_goat4         lung_goat1         lung_goat2         lung_goat3      
##  Min.   :    0.00   Min.   :   0.000   Min.   :   0.000   Min.   :   0.000  
##  1st Qu.:    0.02   1st Qu.:   0.030   1st Qu.:   0.025   1st Qu.:   0.028  
##  Median :    2.20   Median :   2.189   Median :   2.133   Median :   2.279  
##  Mean   :   60.04   Mean   :  29.228   Mean   :  30.161   Mean   :  29.716  
##  3rd Qu.:   23.29   3rd Qu.:  21.674   3rd Qu.:  21.617   3rd Qu.:  21.831  
##  Max.   :56324.83   Max.   :7720.992   Max.   :7011.222   Max.   :7485.892  
##    lung_goat4        muscle_goat1       muscle_goat2       muscle_goat3     
##  Min.   :   0.000   Min.   :    0.00   Min.   :    0.00   Min.   :    0.00  
##  1st Qu.:   0.029   1st Qu.:    0.00   1st Qu.:    0.00   1st Qu.:    0.00  
##  Median :   2.275   Median :    1.93   Median :    1.79   Median :    1.64  
##  Mean   :  30.360   Mean   :   63.79   Mean   :   63.24   Mean   :   66.46  
##  3rd Qu.:  22.030   3rd Qu.:   23.08   3rd Qu.:   23.79   3rd Qu.:   24.72  
##  Max.   :8257.394   Max.   :38837.86   Max.   :40558.02   Max.   :72589.46  
##   muscle_goat4        testis_goat1       testis_goat2     
##  Min.   :    0.000   Min.   :    0.00   Min.   :    0.00  
##  1st Qu.:    0.000   1st Qu.:    0.04   1st Qu.:    0.05  
##  Median :    1.835   Median :    2.37   Median :    2.40  
##  Mean   :   57.033   Mean   :   63.51   Mean   :   64.39  
##  3rd Qu.:   23.789   3rd Qu.:   27.00   3rd Qu.:   26.67  
##  Max.   :28664.987   Max.   :65135.92   Max.   :77294.82
boxplot(goat_loess)

boxplot(goat_loess,outline=FALSE) #outliers are not drawn

#percentage of 0 
percent_zero<-(colSums(goat_loess==0)/nrow(goat_loess))*100
plot(percent_zero,col="purple",main="Percentage of 0 per tissue and subject",type="b")

#median 
list_median<-apply(goat_loess,2,median)
plot(list_median,col="blue",main="Median per tissue and subject",type="b",xlab="Tissues")

tissue_split <- strsplit(colnames(goat),"_")
tissue <- sapply(tissue_split,function(x){return (x[1])})
goat_loess2 <- goat_loess
colnames(goat_loess2) <- tissue
goat_loess2 <- melt(as.matrix(goat_loess2))
colnames(goat_loess2) <- c("Gene","Tissue","Expression")
ggplot(data=goat_loess2,aes(x=Tissue,y=Expression,color=Tissue))+geom_boxplot()+labs(title="goat_loess tissues boxplots")+ theme(legend.position="none")

When choosing to fill in colors the outliers on red, all points are colored in red less the horizontal bar on level 0.

eps=1e-6
ggplot(data=goat_loess2,aes(x=Tissue,y=log(Expression+eps),color=Tissue))+geom_boxplot()+labs(title="goat_loess tissues boxplots")+ theme(legend.position="none")

Principal Component Analysis

goat_loess_acp<-PCA(goat_loesst,graph=FALSE) #the individuals are the tissues and the variables are the genes 
fviz_pca_ind (goat_loess_acp, col.ind = "cos2",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE # Évite le chevauchement de texte
             )
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

fviz_eig(goat_loess_acp, addlabels = TRUE) #to visualize how many components are necessary 

corrplot(cor(goat),method="ellipse")

corrplot(cor(goat_loess),method="ellipse")

TPM NORMALIZATION DATA ANALYSIS

goat_tpm<-read.table("../data/goat_tpm.csv")

#transformations needed to have the same matrix structure that other goats data 
#lines
rownames(goat_tpm)<-as.character(goat_tpm[,1])
#colomns
colnames(goat_tpm)<-as.character(goat_tpm[1,])
goat_tpm<-goat_tpm[c(-1),c(-1)]
goat_tpm<-type.convert(goat_tpm) #by defaul numbers were as characters instead of numeric

goat_tpmt<-t(goat_tpm)
boxplot(goat_tpm)

boxplot(goat_tpm,outline=FALSE) #outliers are not drawn

#percentage of 0 
percent_zero<-(colSums(goat_tpm==0)/nrow(goat_tpm))*100
plot(percent_zero,col="purple",main="Percentage of 0 per tissue and subject",type="b")

#median 
list_median<-apply(goat_tpm,2,median)
plot(list_median,col="blue",main="Median per tissue and subject",type="b",xlab="Tissues")

tissue_split <- strsplit(colnames(goat_tpm),"_")
tissue <- sapply(tissue_split,function(x){return (x[1])})
goat_tpm2 <- goat_tpm
colnames(goat_tpm2) <- tissue
goat_tpm2 <- melt(as.matrix(goat_tpm2))
colnames(goat_tpm2) <- c("Gene","Tissue","Expression")
ggplot(data=goat_tpm2,aes(x=Tissue,y=Expression,color=Tissue))+geom_boxplot()+labs(title="goat_tpm tissues boxplots")+ theme(legend.position="none")

eps=1e-6
ggplot(data=goat_tpm2,aes(x=Tissue,y=log(Expression+eps),color=Tissue))+geom_boxplot()+labs(title="goat_tpm tissues boxplots")+ theme(legend.position="none")

Principal Component Analysis and comparison with other normalizatons

library("FactoMineR")
library("factoextra")
goat_tpm_acp<-PCA(goat_tpmt,graph=FALSE) #the individuals are the tissues and the variables are the genes 
fviz_pca_ind (goat_tpm_acp, col.ind = "cos2",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE # Évite le chevauchement de texte
             )
## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

fviz_eig(goat_tpm_acp, addlabels = TRUE) #to visualize how many components are necessary 

corrplot(cor(goat),method="ellipse")

corrplot(cor(goat_loess),method="ellipse")

corrplot(cor(goat_tpm),method="ellipse")

A significant difference is testis that is correlated to several tissues and mostly the cerebellum. LIver and muscle not significantly correlated to others in all three normalizations.

par(mfrow=c(1,3))
ind_goat<-get_pca_ind(goat_acp)
corrplot(ind_goat$cos2, is.corr=FALSE,title = "TMM normalization",mar=c(0,0,2,0))
ind_goat_loess<-get_pca_ind(goat_loess_acp)
corrplot(ind_goat_loess$cos2, is.corr=FALSE,title = "Loess normalization",mar=c(0,0,2,0))
ind_goat_tpm<-get_pca_ind(goat_tpm_acp)
corrplot(ind_goat_tpm$cos2, is.corr=FALSE,title = "TPM normalization",mar=c(0,0,2,0))

For all normalizations testis represent around 80% or more dimension 1. For the others, we have essenciall cerebellum ans lung that contribute fort the other dimensions.

par(mfrow=c(2,2))
eps=1e-6
ggplot(data=goat2,aes(x=Tissue,y=log(Expression+eps),color=Tissue))+geom_boxplot()+ labs(title="Goat tissues boxplots")+theme(legend.position="none")

ggplot(data=goat_loess2,aes(x=Tissue,y=log(Expression+eps),color=Tissue))+geom_boxplot()+ labs(title="Goat_loess tissues boxplots")+theme(legend.position="none")

ggplot(data=goat_tpm2,aes(x=Tissue,y=log(Expression+eps),color=Tissue))+geom_boxplot()+ labs(title="Goat_tpm tissues boxplots")+theme(legend.position="none")

par(mfrow=c(1,3))
fviz_eig(goat_acp, addlabels = TRUE) #to visualize how many components are necessary 

fviz_eig(goat_loess_acp, addlabels = TRUE) #to visualize how many components are necessary 

fviz_eig(goat_tpm_acp, addlabels = TRUE) #to visualize how many components are necessary 

sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=fr_FR.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=fr_FR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=fr_FR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] Hmisc_4.5-0      Formula_1.2-4    survival_3.2-11  lattice_0.20-44 
## [5] reshape2_1.4.4   factoextra_1.0.7 ggplot2_3.3.5    FactoMineR_2.4  
## [9] corrplot_0.90   
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.0           tidyr_1.1.3          jsonlite_1.7.2      
##  [4] splines_4.0.4        carData_3.0-4        bslib_0.2.5.1       
##  [7] assertthat_0.2.1     highr_0.9            latticeExtra_0.6-29 
## [10] cellranger_1.1.0     yaml_2.2.1           ggrepel_0.9.1       
## [13] pillar_1.6.1         backports_1.2.1      glue_1.4.2          
## [16] digest_0.6.27        RColorBrewer_1.1-2   ggsignif_0.6.2      
## [19] checkmate_2.0.0      colorspace_2.0-2     htmltools_0.5.1.1   
## [22] Matrix_1.3-3         plyr_1.8.6           pkgconfig_2.0.3     
## [25] broom_0.7.8          haven_2.4.1          purrr_0.3.4         
## [28] scales_1.1.1         openxlsx_4.2.4       jpeg_0.1-8.1        
## [31] rio_0.5.27           htmlTable_2.2.1      tibble_3.1.2        
## [34] car_3.0-11           generics_0.1.0       farver_2.1.0        
## [37] ellipsis_0.3.2       ggpubr_0.4.0         DT_0.18             
## [40] withr_2.4.2          nnet_7.3-16          readxl_1.3.1        
## [43] magrittr_2.0.1       crayon_1.4.1         evaluate_0.14       
## [46] fansi_0.5.0          MASS_7.3-54          forcats_0.5.1       
## [49] rstatix_0.7.0        foreign_0.8-81       tools_4.0.4         
## [52] data.table_1.14.0    hms_1.1.0            lifecycle_1.0.0     
## [55] stringr_1.4.0        munsell_0.5.0        zip_2.2.0           
## [58] cluster_2.1.2        flashClust_1.01-2    compiler_4.0.4      
## [61] jquerylib_0.1.4      rlang_0.4.11         grid_4.0.4          
## [64] rstudioapi_0.13      htmlwidgets_1.5.3    leaps_3.1           
## [67] base64enc_0.1-3      labeling_0.4.2       rmarkdown_2.14      
## [70] gtable_0.3.0         abind_1.4-5          curl_4.3.2          
## [73] DBI_1.1.1            R6_2.5.0             gridExtra_2.3       
## [76] knitr_1.33           dplyr_1.0.7          utf8_1.2.1          
## [79] stringi_1.6.2        Rcpp_1.0.7           vctrs_0.3.8         
## [82] rpart_4.1-15         png_0.1-7            scatterplot3d_0.3-41
## [85] tidyselect_1.1.1     xfun_0.31